-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[RISCV] Match (ext (op a, b)) to (wop a, b) #137508
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This suboptimal case was found when trying to optimize ABD/ABDS operation. Adding ISel patterns is the simplest way to optimize. We can add DAGCombine cases for `ISD::SIGN_EXTEND/ISD::ZERO_EXTEND` instead but that may need a lot of manual handlings.
|
@llvm/pr-subscribers-backend-risc-v Author: Pengcheng Wang (wangpc-pp) ChangesThis suboptimal case was found when trying to optimize ABD/ABDS Adding ISel patterns is the simplest way to optimize. We can add Full diff: https://github.com/llvm/llvm-project/pull/137508.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index aea125c5348dd..55a5109b7ecfa 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -467,6 +467,17 @@ multiclass VPatWidenBinarySDNode_VV_VX<SDNode op, PatFrags extop1, PatFrags exto
(!cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX)
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
GPR:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
+ if !eq(extop1, extop2) then
+ def : Pat<(wti.Vector (extop1 (op (vti.Vector vti.RegClass:$rs2),
+ (vti.Vector vti.RegClass:$rs1)))),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
+ vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
+ def : Pat<(wti.Vector (extop1 (op (vti.Vector vti.RegClass:$rs2),
+ (vti.Vector (SplatPat (XLenVT GPR:$rs1)))))),
+ (!cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
+ GPR:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/abd.ll b/llvm/test/CodeGen/RISCV/rvv/abd.ll
index 583d872238df7..b8d95bd95df8a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/abd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/abd.ll
@@ -58,10 +58,8 @@ define <vscale x 8 x i16> @sabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; CHECK-NEXT: vmin.vv v10, v8, v9
-; CHECK-NEXT: vmax.vv v8, v8, v9
-; CHECK-NEXT: vsub.vv v10, v8, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v8, v10
+; CHECK-NEXT: vmax.vv v11, v8, v9
+; CHECK-NEXT: vwsubu.vv v8, v11, v10
; CHECK-NEXT: ret
%a.sext = sext <vscale x 8 x i8> %a to <vscale x 8 x i16>
%b.sext = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -91,10 +89,8 @@ define <vscale x 4 x i32> @sabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vmin.vv v10, v8, v9
-; CHECK-NEXT: vmax.vv v8, v8, v9
-; CHECK-NEXT: vsub.vv v10, v8, v10
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v8, v10
+; CHECK-NEXT: vmax.vv v11, v8, v9
+; CHECK-NEXT: vwsubu.vv v8, v11, v10
; CHECK-NEXT: ret
%a.sext = sext <vscale x 4 x i16> %a to <vscale x 4 x i32>
%b.sext = sext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@@ -124,10 +120,8 @@ define <vscale x 2 x i64> @sabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmin.vv v10, v8, v9
-; CHECK-NEXT: vmax.vv v8, v8, v9
-; CHECK-NEXT: vsub.vv v10, v8, v10
-; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v8, v10
+; CHECK-NEXT: vmax.vv v11, v8, v9
+; CHECK-NEXT: vwsubu.vv v8, v11, v10
; CHECK-NEXT: ret
%a.sext = sext <vscale x 2 x i32> %a to <vscale x 2 x i64>
%b.sext = sext <vscale x 2 x i32> %b to <vscale x 2 x i64>
@@ -192,10 +186,8 @@ define <vscale x 8 x i16> @uabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; CHECK-NEXT: vminu.vv v10, v8, v9
-; CHECK-NEXT: vmaxu.vv v8, v8, v9
-; CHECK-NEXT: vsub.vv v10, v8, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v8, v10
+; CHECK-NEXT: vmaxu.vv v11, v8, v9
+; CHECK-NEXT: vwsubu.vv v8, v11, v10
; CHECK-NEXT: ret
%a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
%b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -225,10 +217,8 @@ define <vscale x 4 x i32> @uabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vminu.vv v10, v8, v9
-; CHECK-NEXT: vmaxu.vv v8, v8, v9
-; CHECK-NEXT: vsub.vv v10, v8, v10
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v8, v10
+; CHECK-NEXT: vmaxu.vv v11, v8, v9
+; CHECK-NEXT: vwsubu.vv v8, v11, v10
; CHECK-NEXT: ret
%a.zext = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
%b.zext = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@@ -258,10 +248,8 @@ define <vscale x 2 x i64> @uabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vminu.vv v10, v8, v9
-; CHECK-NEXT: vmaxu.vv v8, v8, v9
-; CHECK-NEXT: vsub.vv v10, v8, v10
-; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v8, v10
+; CHECK-NEXT: vmaxu.vv v11, v8, v9
+; CHECK-NEXT: vwsubu.vv v8, v11, v10
; CHECK-NEXT: ret
%a.zext = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
%b.zext = zext <vscale x 2 x i32> %b to <vscale x 2 x i64>
@@ -296,11 +284,9 @@ define <vscale x 4 x i32> @uabd_non_matching_promoted_ops(<vscale x 4 x i8> %a,
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vminu.vv v8, v10, v9
-; CHECK-NEXT: vmaxu.vv v9, v10, v9
-; CHECK-NEXT: vsub.vv v10, v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v8, v10
+; CHECK-NEXT: vminu.vv v11, v10, v9
+; CHECK-NEXT: vmaxu.vv v10, v10, v9
+; CHECK-NEXT: vwsubu.vv v8, v10, v11
; CHECK-NEXT: ret
%a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
%b.zext = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>
|
|
If there is a risk of overflow, is it correct to match |
Yes, you are right. This transformation is not correct. |
Is there a generic mechanism to analyze if the result won't overflow? Apparently for ABD cases this holds, but how to detect such cases? |
If you just want to optimize ABD, you can combine |
This suboptimal case was found when trying to optimize ABD/ABDS
operation.
Adding ISel patterns is the simplest way to optimize. We can add
DAGCombine cases for
ISD::SIGN_EXTEND/ISD::ZERO_EXTENDinsteadbut that may need a lot of manual handlings.